Evaluation of RISK survival models

This document highlights the use of

for the evaluation (RRPlot), and calibration of cox models (CoxRiskCalibration) or logistic models (CalibrationProbPoissonRisk) of survival data.

Furthermore, it can be used to evaluate any Risk index that reruns the probability of a future event on external data-set.

This document will use the survival::rotterdam, and survival::gbsg data-sets to train and predict the risk of cancer recurrence after surgery. Both Cox and Logistic models will be trained and evaluated.

Here are some sample plots returned by the evaluated functions:

The libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

Breast Cancer Royston-Altman data

data(gbsg, package=“survival”) and data(rotterdam, package=“survival”)

gbsgdata <- gbsg
rownames(gbsgdata) <- gbsgdata$pid
gbsgdata$pid <- NULL

odata <-rotterdam
rownames(odata) <- odata$pid
odata$pid <- NULL
odata$rfstime <- odata$rtime
odata$status <- odata$recur
odata$rtime <- NULL
odata$recur <- NULL

odata <- odata[,colnames(odata) %in% colnames(gbsgdata)]

odata$size <- 10*(odata$size=="<=20") + 
  35*(odata$size=="20-50") + 
  60*(odata$size==">50")

data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,odata))

data$`(Intercept)` <- NULL

dataBrestCancerTrain <- cbind(time=odata[rownames(data),"rfstime"],status=odata[rownames(data),"status"],data)

colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),":","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain)," ","")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"\\.","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"-","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),">","_")
dataBrestCancerTrain$time <- dataBrestCancerTrain$time/365 ## To years


pander::pander(table(odata[rownames(data),"status"]),caption="rotterdam")
rotterdam
0 1
1464 1518

data(gbsg, package=“survival”) data conditioning

gbsgdata <- gbsgdata[,colnames(odata)]
data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,gbsgdata))

data$`(Intercept)` <- NULL

dataBrestCancerTest <- cbind(time=gbsgdata[rownames(data),"rfstime"],status=gbsgdata[rownames(data),"status"],data)

colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),":","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest)," ","")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"\\.","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"-","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),">","_")
dataBrestCancerTest$time <- dataBrestCancerTest$time/365

pander::pander(table(odata[rownames(data),"status"]), caption="gbsg")
gbsg
0 1
499 183

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataBrestCancerTrain,loops=1,NumberofRepeats = 5)

—–.

sm <- summary(ml)
pander::pander(sm$coefficients)
  Estimate lower HR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
age_nodes 7.76e-04 1.001 1.001 1.001 0.626 0.601 0.633 0.630 0.602 0.634 0.028330 0.42972 12.26 13.415 3.21e-02 1.0
size_grade 5.02e-03 1.005 1.005 1.006 0.598 0.624 0.633 0.599 0.627 0.635 0.017876 0.37163 9.49 10.720 7.67e-03 1.0
nodes 8.25e-02 1.077 1.086 1.095 0.637 0.642 0.643 0.640 0.644 0.644 0.007344 0.06897 8.25 2.000 6.04e-05 1.0
size 6.77e-03 1.005 1.007 1.009 0.595 0.641 0.643 0.595 0.642 0.644 0.013742 0.33845 7.79 9.406 1.39e-03 1.0
size_nodes -3.61e-04 1.000 1.000 1.000 0.624 0.643 0.643 0.629 0.644 0.644 0.003445 0.34249 7.24 9.559 -3.56e-04 1.0
age_pgr -4.02e-06 1.000 1.000 1.000 0.548 0.631 0.635 0.544 0.634 0.637 0.009210 0.18985 6.33 5.482 3.16e-03 0.2
grade 1.97e-01 1.138 1.218 1.303 0.565 0.638 0.643 0.561 0.639 0.644 0.009051 0.20188 5.83 6.161 5.02e-03 1.0
age_size -1.23e-04 1.000 1.000 1.000 0.567 0.629 0.633 0.568 0.632 0.634 0.005990 0.18848 5.63 5.214 2.53e-03 1.0
age -2.97e-03 0.996 0.997 0.998 0.513 0.628 0.643 0.513 0.628 0.644 0.004165 0.09238 5.27 2.525 1.55e-02 1.0
grade_nodes -1.31e-02 0.982 0.987 0.992 0.635 0.645 0.643 0.639 0.646 0.644 0.002028 -0.09046 4.95 -2.532 -2.68e-03 1.0
grade_pgr 4.96e-05 1.000 1.000 1.000 0.541 0.633 0.635 0.537 0.636 0.637 0.004752 0.26528 4.83 7.348 1.01e-03 0.2
size_pgr 1.22e-06 1.000 1.000 1.000 0.490 0.633 0.635 0.494 0.635 0.637 0.002102 0.01627 3.82 0.452 1.62e-03 0.2
meno_nodes -2.80e-03 0.996 0.997 0.999 0.580 0.635 0.635 0.584 0.637 0.637 0.001735 0.00739 3.49 0.205 -5.37e-04 0.2
meno_pgr 7.13e-05 1.000 1.000 1.000 0.527 0.634 0.635 0.522 0.636 0.637 0.002519 0.06788 3.30 1.862 4.64e-04 0.2
age_grade -8.79e-05 1.000 1.000 1.000 0.508 0.632 0.635 0.509 0.634 0.637 0.000891 0.07139 2.42 1.951 2.66e-03 0.2

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 5 # Five years

h0 <- sum(dataBrestCancerTrain$status & dataBrestCancerTrain$time <= timeinterval)
h0 <- h0/sum((dataBrestCancerTrain$time > timeinterval) | (dataBrestCancerTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.429 5
index <- predict(ml,dataBrestCancerTrain)
rdata <- cbind(dataBrestCancerTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event

toinclude <- rdata[,1]==1 | dataBrestCancerTrain$time > 2.0*timeinterval
obstiemToEvent <- dataBrestCancerTrain[toinclude,"time"]
tmin<-min(obstiemToEvent)
sum(toinclude)

[1] 1958

timetoEvent <- meanTimeToEvent(rdata[toinclude,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent~0+timetoEvent)
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent 0.841 0.014 60.2 0
Fitting linear model: obstiemToEvent ~ 0 + timetoEvent
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1958 4.04 0.649 0.649
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[toinclude,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
text(tmin+0.005*(tmax-tmin),tmax,sprintf("R.sq=%4.3f",sm$r.squared),cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- mean(abs(timetoEvent-obstiemToEvent))
pander::pander(MADerror2)

3.41

The Time vs. Events are not calibrated. Lets do the calibration

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.457 0.386 0.345 0.215 1.88e-01 0.4997
RR 1.696 1.723 1.787 2.542 7.67e+01 1.6980
RR_LCI 1.591 1.612 1.662 1.967 1.59e-01 1.5924
RR_UCI 1.807 1.841 1.921 3.285 3.70e+04 1.8107
SEN 0.301 0.466 0.590 0.969 1.00e+00 0.2411
SPE 0.900 0.798 0.704 0.121 1.02e-02 0.9290
BACC 0.600 0.632 0.647 0.545 5.05e-01 0.5850
NetBenefit 0.112 0.175 0.223 0.375 3.96e-01 0.0879
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.13 1.07 1.19 4.69e-06
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.16 1.16 1.15 1.17
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.35 1.35 1.35 1.36
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.677 0.677 0.663 0.691
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.676 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.301 0.278 0.325
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.457 0.386
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 477.549428 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1980 811 1143 96.4 395.1
class=1 398 250 179 28.5 32.4
class=2 604 457 197 345.3 401.9

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBrestCancerTrain,"status","time")


pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.698 1.35 6.96

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event after calibration

timetoEvent <- meanTimeToEvent(rdata[toinclude,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent~0+timetoEvent)
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent 0.983 0.0163 60.2 0
Fitting linear model: obstiemToEvent ~ 0 + timetoEvent
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1958 4.04 0.649 0.649
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[toinclude,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
text(tmin+0.005*(tmax-tmin),tmax,sprintf("R.sq=%4.3f",sm$r.squared),cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- c(MADerror2,mean(abs(timetoEvent-obstiemToEvent)))
pander::pander(MADerror2)

3.41 and 3.25

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6302 0.548 0.498 0.325 2.88e-01 0.500
RR 1.6958 1.723 1.787 2.542 7.67e+01 1.763
RR_LCI 1.5912 1.612 1.662 1.967 1.59e-01 1.640
RR_UCI 1.8072 1.841 1.921 3.285 3.70e+04 1.894
SEN 0.3011 0.466 0.590 0.969 1.00e+00 0.582
SPE 0.8996 0.798 0.704 0.121 1.02e-02 0.704
BACC 0.6003 0.632 0.647 0.545 5.05e-01 0.643
NetBenefit 0.0693 0.117 0.156 0.286 3.13e-01 0.151
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.997 0.947 1.05 0.908
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1 1 0.995 1.01
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.01 1.01 1.01 1.01
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.677 0.677 0.661 0.691
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.676 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.301 0.278 0.325
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.63 0.548
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 477.549428 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1980 811 1143 96.4 395.1
class=1 398 250 179 28.5 32.4
class=2 604 457 197 345.3 401.9

Performance on the external data set

index <- predict(ml,dataBrestCancerTest)
pp <- predictionStats_binary(cbind(dataBrestCancerTest$status,index),plotname="Breast Cancer")

par(op)


prob <- ppoisGzero(index,h0)
rdata <- cbind(dataBrestCancerTest$status,prob)
rrCoxTestAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

External Data Report

pander::pander(t(rrCoxTestAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.63 @:0.548 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6288 0.5475 0.5148 0.346 2.77e-01 0.4991
RR 1.8307 1.6248 1.6788 3.108 2.20e+01 1.5992
RR_LCI 1.5650 1.3787 1.4164 1.630 4.75e-02 1.3472
RR_UCI 2.1414 1.9149 1.9898 5.925 1.02e+04 1.8983
SEN 0.3445 0.4649 0.5485 0.973 1.00e+00 0.5585
SPE 0.8708 0.7416 0.6796 0.119 1.29e-02 0.6486
BACC 0.6076 0.6032 0.6140 0.546 5.06e-01 0.6036
NetBenefit 0.0267 0.0263 0.0473 0.162 2.23e-01 0.0459
pander::pander(t(rrCoxTestAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.32 1.17 1.48 4.46e-06
pander::pander(rrCoxTestAnalysis$c.index,caption="C. Index")
  • C Index: 0.666

  • Dxy: 0.331

  • S.D.: 0.031

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 177175

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.666 0.664 0.633 0.695
pander::pander(t(rrCoxTestAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.663 0.622 0.703
pander::pander((rrCoxTestAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.341 0.288 0.398
pander::pander((rrCoxTestAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.871 0.833 0.903
pander::pander(t(rrCoxTestAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.63 0.548
pander::pander(rrCoxTestAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 85.405348 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 448 160 219.0 15.89 60.334
class=1 86 37 33.9 0.29 0.328
class=2 152 102 46.1 67.59 81.340

Calibrating the index on the test data

calprob <- CoxRiskCalibration(ml,dataBrestCancerTest,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.529 0.915 4.86
rdata <- cbind(dataBrestCancerTest$status,calprob$prob)

rrAnalysis <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

After Calibration Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.5853 0.4824 0.422 0.275 2.18e-01 0.500
RR 1.7405 1.7093 1.679 3.108 2.20e+01 1.729
RR_LCI 1.4790 1.4550 1.416 1.630 4.75e-02 1.474
RR_UCI 2.0483 2.0079 1.990 5.925 1.02e+04 2.029
SEN 0.2676 0.4181 0.548 0.973 1.00e+00 0.388
SPE 0.8992 0.7984 0.680 0.119 1.29e-02 0.824
BACC 0.5834 0.6083 0.614 0.546 5.06e-01 0.606
NetBenefit 0.0364 0.0763 0.107 0.236 2.81e-01 0.070
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.22 1.09 1.37 0.000689
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.666

  • Dxy: 0.331

  • S.D.: 0.031

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 177175

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.666 0.665 0.635 0.695
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.663 0.622 0.703
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.264 0.215 0.318
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.865 0.927
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.586 0.482
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 80.478762 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 483 174 232.7 14.80 67.96
class=1 85 46 32.0 6.16 6.94
class=2 118 79 34.3 58.05 66.45

Logistic Model

Here we train a logistic model on the same data set

## Only label subjects that present event withing five years

dataBrestCancerR <- subset(dataBrestCancerTrain, time>=5 | status==1)
dataBrestCancerR$status <- dataBrestCancerR$status * (dataBrestCancerR$time < 5)
dataBrestCancerR$time <- NULL

#ml <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=20,NumberofRepeats = 5)
mlog <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=1,NumberofRepeats = 5)

—–..

sm <- summary(mlog)
pander::pander(sm$coefficients)
  Estimate lower OR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
size_nodes 1.05e-03 1.001 1.001 1.001 0.669 0.571 0.668 0.627 0.500 0.628 0.11233 0.63654 17.86 18.870 0.128490 1
nodes 4.33e-02 1.040 1.044 1.048 0.676 0.634 0.690 0.639 0.621 0.662 0.07110 0.57106 14.13 16.179 0.040494 1
grade_nodes 1.50e-02 1.014 1.015 1.016 0.682 0.637 0.686 0.649 0.624 0.655 0.06580 0.54866 13.66 15.650 0.031087 1
age_nodes 1.06e-03 1.001 1.001 1.001 0.678 0.653 0.686 0.642 0.621 0.657 0.03346 0.21312 9.39 5.710 0.035896 1
size_grade 1.75e-03 1.001 1.002 1.002 0.632 0.682 0.686 0.626 0.646 0.655 0.01787 0.29411 6.74 7.728 0.008648 1
age_size 8.73e-05 1.000 1.000 1.000 0.608 0.682 0.686 0.577 0.649 0.657 0.01534 0.29152 6.41 7.652 0.007600 1
grade 2.27e-01 1.168 1.254 1.347 0.571 0.683 0.690 0.500 0.653 0.662 0.01340 0.19036 6.20 4.983 0.008461 1
age_meno -6.04e-03 0.992 0.994 0.996 0.571 0.676 0.686 0.500 0.645 0.657 0.00782 0.08057 4.76 2.337 0.012065 1
age_pgr -5.42e-06 1.000 1.000 1.000 0.571 0.686 0.686 0.500 0.656 0.657 0.00512 0.00745 4.11 0.194 0.000417 1
age_grade -1.65e-03 0.997 0.998 0.999 0.574 0.690 0.690 0.507 0.661 0.662 0.00454 0.11372 3.60 2.960 0.000315 1
meno_grade 1.02e-01 1.045 1.107 1.173 0.571 0.683 0.686 0.500 0.652 0.657 0.00425 0.20428 3.47 5.343 0.004441 1
nodes_hormon -1.38e-02 0.979 0.986 0.994 0.587 0.688 0.686 0.526 0.658 0.655 0.00280 0.45522 3.44 12.150 -0.002853 1
size 3.94e-03 1.002 1.004 1.006 0.611 0.693 0.690 0.618 0.663 0.662 0.00507 0.21050 3.42 5.600 -0.001075 1
meno_pgr 3.19e-04 1.000 1.000 1.001 0.571 0.687 0.686 0.500 0.657 0.657 0.00316 0.05977 3.35 1.558 -0.000429 1
pgr -1.07e-04 1.000 1.000 1.000 0.571 0.689 0.686 0.500 0.659 0.655 0.00257 0.19759 2.64 5.745 -0.004123 1
meno_nodes -2.60e-02 0.955 0.974 0.994 0.640 0.686 0.686 0.595 0.656 0.657 0.00264 -0.06329 2.59 -1.645 0.000631 1
grade_pgr -3.51e-05 1.000 1.000 1.000 0.571 0.669 0.668 0.500 0.627 0.628 0.00241 0.17471 2.55 5.058 0.001252 1
meno_size 2.34e-03 1.000 1.002 1.004 0.604 0.691 0.690 0.578 0.663 0.662 0.00185 0.10227 2.43 2.662 -0.001378 1

Logistic Model Performance

op <- par(no.readonly = TRUE)


cprob <- predict(mlog,dataBrestCancerTrain)

rdata <- cbind(dataBrestCancerTrain$status,cprob)
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5.0)

par(op)

Training Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.394 0.255 0.130969 0.500
RR 1.765 1.739 1.799 2.213 1.000000 1.773
RR_LCI 1.659 1.627 1.676 1.764 0.000000 1.665
RR_UCI 1.879 1.858 1.931 2.777 0.000000 1.888
SEN 0.327 0.470 0.566 0.962 1.000000 0.374
SPE 0.900 0.799 0.731 0.125 0.000683 0.874
BACC 0.613 0.635 0.648 0.543 0.500342 0.624
NetBenefit 0.108 0.165 0.202 0.342 0.435125 0.129
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.957 0.91 1.01 0.0901
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206582

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.667 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7

Results on the validation set using Logistic model

pre <- predict(mlog,dataBrestCancerTest)
rdata <- cbind(dataBrestCancerTest$status,pre)

rrAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5)

par(op)

Validation Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.541 @:0.431 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.439 0.306 2.31e-01 0.4996
RR 1.792 1.702 1.756 2.678 2.20e+01 1.7318
RR_LCI 1.529 1.428 1.477 1.679 4.75e-02 1.4731
RR_UCI 2.100 2.029 2.088 4.271 1.02e+04 2.0360
SEN 0.395 0.595 0.579 0.950 1.00e+00 0.4482
SPE 0.832 0.638 0.669 0.181 1.29e-02 0.7804
BACC 0.613 0.617 0.624 0.565 5.06e-01 0.6143
NetBenefit 0.060 0.105 0.106 0.210 2.69e-01 0.0717
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.13 1 1.26 0.0428
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.669 0.638 0.7
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Logistic Model Poisson Calibration

riskdata <- cbind(dataBrestCancerTrain$status,predict(mlog,dataBrestCancerTrain,type="prob"),dataBrestCancerTrain$time)
calprob <- CalibrationProbPoissonRisk(riskdata)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Logistic Calibration Parameters")
h0 Gain DeltaTime
0.676 1.31 7.14
timeinterval <- calprob$timeInterval;
gain <- calprob$hazardGain

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated logistic: training

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6395 0.521 0.480 0.319 0.167426 0.500
RR 1.7654 1.739 1.799 2.213 1.000000 1.759
RR_LCI 1.6587 1.627 1.676 1.764 0.000000 1.643
RR_UCI 1.8790 1.858 1.931 2.777 0.000000 1.882
SEN 0.3267 0.470 0.566 0.962 1.000000 0.507
SPE 0.8996 0.799 0.731 0.125 0.000683 0.774
BACC 0.6132 0.635 0.648 0.543 0.500342 0.641
NetBenefit 0.0789 0.132 0.166 0.288 0.410407 0.147
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.02 0.974 1.08 0.343
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206586

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.667 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.639 0.521
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7
probLog <- predict(mlog,dataBrestCancerTest)
aprob <- adjustProb(probLog,gain)

rdata <- cbind(dataBrestCancerTest$status,aprob)
rrAnalysisTestLogistic <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated validation

pander::pander(t(rrAnalysisTestLogistic$keyPoints),caption="Threshold values")
Threshold values
  @:0.639 @:0.521 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.63882 0.5212 0.5294 0.379 2.90e-01 0.5001
RR 1.79193 1.7024 1.7562 2.678 2.20e+01 1.7026
RR_LCI 1.52914 1.4283 1.4771 1.679 4.75e-02 1.4179
RR_UCI 2.09988 2.0290 2.0880 4.271 1.02e+04 2.0446
SEN 0.39465 0.5953 0.5786 0.950 1.00e+00 0.6421
SPE 0.83204 0.6382 0.6693 0.181 1.29e-02 0.5866
BACC 0.61335 0.6168 0.6239 0.565 5.06e-01 0.6144
NetBenefit 0.00447 0.0374 0.0423 0.132 2.09e-01 0.0466
pander::pander(t(rrAnalysisTestLogistic$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.22 1.08 1.36 0.00101
pander::pander(rrAnalysisTestLogistic$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.669 0.637 0.699
pander::pander(t(rrAnalysisTestLogistic$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysisTestLogistic$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.639 0.521
pander::pander(rrAnalysisTestLogistic$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Comparing the COX and Logistic Models on the Independent Data

pander::pander(t(rrCoxTestAnalysis$OAcum95ci))
mean 50% 2.5% 97.5%
0.841 0.841 0.84 0.842
pander::pander(t(rrAnalysisTestLogistic$OAcum95ci))
mean 50% 2.5% 97.5%
0.791 0.791 0.791 0.792
pander::pander(t(rrCoxTestAnalysis$OE95ci))
mean 50% 2.5% 97.5%
1.09 1.09 1.06 1.12
pander::pander(t(rrAnalysisTestLogistic$OE95ci))
mean 50% 2.5% 97.5%
0.989 0.988 0.963 1.02
maxobs <- sum(dataBrestCancerTest$status)

par(mfrow=c(1,2),cex=0.75)

plot(rrCoxTestAnalysis$CumulativeOvs[,1:2],type="l",lty=1,
     main="Cumulative Probability",
     xlab="Observed",
     ylab="Cumulative Probability",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$CumulativeOvs[,1:2],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)


plot(rrCoxTestAnalysis$CumulativeOvs$Observed,
     rrCoxTestAnalysis$CumulativeOvs$Cumulative-
       rrCoxTestAnalysis$CumulativeOvs$Observed,
     main="Cumulative Risk Difference",
     xlab="Observed",
     ylab="Cumulative Risk - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$CumulativeOvs$Observed,
     rrAnalysisTestLogistic$CumulativeOvs$Cumulative-
       rrAnalysisTestLogistic$CumulativeOvs$Observed,
     lty=2,
     col="red")
legend("topleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData[,2:3],type="l",lty=1,
     main="Expected over Time",
     xlab="Observed",
     ylab="Expected",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$OEData[,2:3],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData$Observed,
     rrCoxTestAnalysis$OEData$Expected-
       rrCoxTestAnalysis$OEData$Observed,
     main="Expected vs Observed Difference",
     xlab="Observed",
     ylab="Cumulative - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$OEData$Observed,
     rrAnalysisTestLogistic$OEData$Expected-
       rrAnalysisTestLogistic$OEData$Observed,
     lty=2,col="red")

legend("bottomleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

par(op)